Visualising the effect sizes and significance for differential gene expression in differentiated and treated astrocytes.
# load de outputs
res.file<-paste0(workdir, "/analysed_data/differential_analysis_BMP4_CNTF.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# select comparisons of interest
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]# create function for plot
plot_volcano <- function(i) {
# select relevant comparison
de = dgeBulk_sel[[i]]
name=names(dgeBulk_sel)[i]
# print(name)
# ensure empty gene symbols are replaced with ensembl IDs
de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id
# add a column of NAs
de$diffexpressed <- NA
# if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP"
de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
# if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
de$delabel <- NA
# label all up and downregulated genes
# de[!(is.na(de$diffexpressed)),]$delabel <- de[!(is.na(de$diffexpressed)),]$symbol
# label top up and downregulated genes
# largest -log10(p-value)
lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
highN = dge_res_sheets_key$Comparison[-c(3,10)]
if (name %in% lowN) {
N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
} else {N = 10}
sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol
de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
# largest log2(FC), also significant
sigde <- de %>% dplyr::filter(pvalue < 0.05)
labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
sym = de[de$id %in% labelid,]$symbol
de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
ylim = max(-log10(de$pvalue))+(max(-log10(de$pvalue))*1)
uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP")
dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN")
vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point(size = 1, alpha = 0.5) +
scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
theme_classic(base_size=18) +
geom_label_repel(
data = dwlabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(NA, -1),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
geom_label_repel(
data = uplabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(1, NA),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
scale_color_manual(values=c("#788feb", "#942025", "black")) +
# geom_vline(xintercept=c(-0.6, 0.6), col="black") +
geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) +
ggtitle(name) +
theme(plot.title = element_text(hjust = 0.5, size=18),
legend.position = "none")
ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=6.5, height=6.5)
return(vol_plot)
## extra: for gene-term plot querying
# de_genes <- de[!is.na(de$delabel),]$delabel
}# apply function to all comparisons
lapply(1:length(dgeBulk_sel), plot_volcano)## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
# apply function to select comparison
i=3
lapply(i, plot_volcano)## [[1]]
# load differentiated BMP4 and CNTF marker genes
res.file<-paste0(workdir, "/analysed_data/differential_analysis_wo2W_BMP4_CNTF_wo2W.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# select comparisons of interest
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]# create function for plot
plot_volcano <- function(i) {
# select relevant comparison
de = dgeBulk_sel[[i]]
name=names(dgeBulk_sel)[i]
# print(name)
# ensure empty gene symbols are replaced with ensembl IDs
de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id
# add a column of NAs
de$diffexpressed <- NA
# if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP"
de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
# if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
de$delabel <- NA
# label all up and downregulated genes
# de[!(is.na(de$diffexpressed)),]$delabel <- de[!(is.na(de$diffexpressed)),]$symbol
# label top up and downregulated genes
# largest -log10(p-value)
lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
highN = dge_res_sheets_key$Comparison[-c(3,10)]
if (name %in% lowN) {
N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
} else {N = 10}
sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol
de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
# largest log2(FC), also significant
sigde <- de %>% dplyr::filter(pvalue < 0.05)
labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
sym = de[de$id %in% labelid,]$symbol
de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
ylim = max(-log10(de$pvalue))+(max(-log10(de$pvalue))*0.5)
ynud =
uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP")
dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN")
vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point(size = 1, alpha = 0.5) +
scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
theme_classic(base_size=18) +
geom_label_repel(
data = dwlabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(NA, -1),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
geom_label_repel(
data = uplabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(1, NA),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
scale_color_manual(values=c("#788feb", "#942025", "black")) +
# geom_vline(xintercept=c(-0.6, 0.6), col="black") +
geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) +
ggtitle(name) +
theme(plot.title = element_text(hjust = 0.5, size=18),
legend.position = "none")
# ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=5.5, height=5.5)
return(vol_plot)
## extra: for gene-term plot querying
# de_genes <- de[!is.na(de$delabel),]$delabel
}# apply function to all comparisons
lapply(1:length(dgeBulk_sel), plot_volcano)## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
# apply function to select comparison
i=3
lapply(i, plot_volcano)## [[1]]